required libraries

source("UnsupervisedAnalysis_Functions.R")
library(ggplot2)
library(circlize)
library(cowplot)
library(pracma)
library(circlize)
set.seed(123)

Loading and pre-processing the data

We load the data for the unsupervised analysis object that we produced using kmeans clustering

US <- readRDS("../data/USData.rds")

we add the B-Soid clustering data. we cut the first 300 frames (as we did with kmeans), since these are sometimes accompanied with noise from delayed animal placement / lighting changes etc. we create a USDataCheck report to inspect the complexity of the data and a USDataCheck to ensure that all labels have exactly the same number of frames for each file.

US <- AddFromBsoid(US,"../BSOID/Clustering1Data/",cut_start = 300,lab_name = "BSOID.8")
US <- AddFromBsoid(US,"../BSOID/Clustering2Data/",cut_start = 300,lab_name = "BSOID.27")
head(USDataReport(US))
USDataCheck(US)

we first smooth the data over 5 frames, this removes clusters that were identified in a single frame and ensures that beahvior trains are better resolved

US <- SmoothLabels_US(US, 5)

We calculate metrics, such as number of onset-offset and number of frames for each cluster.

US <- CalculateMetrics(US)

Next, we calculate the trainsitionsmatrix for each clustering analysis in each file

US <- AddTransitionMatrixData(US)

We finally calculate the confusion matrix between each clustering result across all files. this will allow us to compare different algorithms and see if they result in clusters that map strongly to each other

US <- AddConfusionMatrix(US)

We save this object, so next time we do not have to re-process it

saveRDS(US,"../data/USData.rds")

Analysis

Analysis of Clustering characteristics

In this section we will investigate how well clusters from different kmeans algorithms correlate to each other and to rearing beahavoir in mice

Mapping to rearing beahviors

PlotConfusionMatrix(US$ConfusionMatrix_norm$`BSOID-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`BSOID.27-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.10-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.25-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.50-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.100-rear.classifier`,pointsize = 4, hclust = TRUE)

we notice that with increase clusters numbers more specifically map to annotated beahviors in general

Mapping to rearing beahviors with svd prior to clustering

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.10-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.25-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.50-rear.classifier`,pointsize = 4, hclust = TRUE)

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.svd.100-rear.classifier`,pointsize = 4, hclust = TRUE)

using svd prior to clustering does not alter the mapping to annotated behaviors strongly

kmeans to svd + kmeans clustering

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.50-kmeans.svd.50`,pointsize = 4, hclust = TRUE)

kmeans and svd + kmeans finds quite different solutions and poor overlap for most clusters

kmeans vs kmeans with more clusters

PlotConfusionMatrix(US$ConfusionMatrix_norm$`kmeans.100-kmeans.50`,pointsize = 4, hclust = TRUE)

As we increase the number of clusters, some are retained, and some are further subdivided into multiple sub-clusters. mapping between kmeans 50 and kmeans 100 is much better than between kmeans 50 and kmeans + svd 50

Behavior flow of kmeans vs kmeans + svd

PlotBehaviorFlow(US,lab = "kmeans.25")

PlotBehaviorFlow(US,lab = "kmeans.svd.25")

PlotBehaviorFlow(US,lab = "BSOID.27")

Overall the transitions for the kmeans + svd cluster are not as well defined as the kmeans without svd

Sensitivit to resolve Phenotype (CSI)

US_CSI <-SplitUSData(US, select = (US$meta$Session == "PostCSI"))
US_CSI$Results <- NULL
PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25", method = "up")

PlotBehaviorFlow_Delta(US_CSI, US_CSI$meta$Condition == "CSI", lab = "kmeans.25", method = "down")

US_CSI <- TwoGroupAnalysis(US_CSI, US_CSI$meta$Condition)
## [1] "no label specified, performing analysis for all"
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#classical readouts
head(US_CSI$Results[[1]]$Statistics$raw)
##                                    name            p          FDR
## 16    bodycentre.center.distance.moving 5.386446e-07 4.883687e-05
## 23        bodycentre.center.transitions 5.536424e-07 4.883687e-05
## 15       bodycentre.center.raw.distance 1.436374e-06 7.322318e-05
## 19        bodycentre.center.time.moving 1.660199e-06 7.322318e-05
## 35         bodycentre.corners.raw.speed 2.126009e-05 7.080668e-04
## 40 bodycentre.corners.percentage.moving 2.858221e-05 7.080668e-04
#clusters stats
head(US_CSI$Results[[1]]$Statistics$kmeans.25)
##           name            p         FDR
## 40   X14.count 1.301842e-06 0.000210856
## 18   X21.count 1.874607e-06 0.000210856
## 17 X21.nframes 2.079875e-05 0.001559631
## 44    X3.count 3.091483e-05 0.001738652
## 46   X25.count 8.459415e-05 0.003806065
## 43  X3.nframes 1.025069e-03 0.038433304
#clusters Transition Stats
head(US_CSI$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to      p.value       CSI   Control       FC        FDR
## 45    16 14 6.123591e-06 10.466667  6.206897 1.686296 0.01576857
## 497   14  3 1.549739e-05 19.733333 13.344828 1.478725 0.01995330
## 534    3 21 1.697303e-04 13.400000  9.344828 1.433948 0.14568817
## 548    3 25 3.802951e-04  8.033333  4.862069 1.652246 0.21421873
## 570   25 14 4.315642e-04  5.333333  3.206897 1.663082 0.21421873
## 20     7 14 5.105470e-04  8.666667  5.896552 1.469786 0.21421873
US$Results <- US_CSI$Results
PlotTransitionsStats(US_CSI)

Sensitivit to resolve Phenotype (FST 45min)

US_FST45 <-SplitUSData(US, select = (US$meta$Session == "FST45min"))
US_FST45$Results <- NULL
PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25", method = "up")

PlotBehaviorFlow_Delta(US_FST45, US_FST45$meta$Treatment == "Swim", lab = "kmeans.25", method = "down")

US_FST45 <- TwoGroupAnalysis(US_FST45, US_FST45$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_FST45$Results[[1]]$Statistics$raw)
##                                 name            p        FDR
## 10           bodycentre.speed.moving 0.0001262400 0.00721099
## 26    bodycentre.periphery.raw.speed 0.0001505386 0.00721099
## 7            bodycentre.raw.distance 0.0001867722 0.00721099
## 9               bodycentre.raw.speed 0.0001918230 0.00721099
## 8         bodycentre.distance.moving 0.0002125056 0.00721099
## 27 bodycentre.periphery.speed.moving 0.0002452436 0.00721099
#clusters stats
head(US_FST45$Results[[1]]$Statistics$kmeans.25)
##           name          p FDR
## 24    X1.count 0.01440250   1
## 23  X1.nframes 0.01882770   1
## 2     X7.count 0.02277431   1
## 27 X11.nframes 0.02443449   1
## 42    X9.count 0.02742777   1
## 28   X11.count 0.03081429   1
#clusters Transition Stats
head(US_FST45$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to     p.value       Swim    Control        FC FDR
## 307   12 24 0.002928148  0.8000000  0.0000000       Inf   1
## 289    1 11 0.010457751 30.0666667 42.8666667 0.7013997   1
## 163   24 12 0.010467054  0.8000000  0.1333333 6.0000000   1
## 458    5  4 0.011236314  0.2000000  1.2666667 0.1578947   1
## 4      7 20 0.012817522  0.2666667  1.0666667 0.2500000   1
## 407   15 24 0.013410074  0.8000000  0.2000000 4.0000000   1
US$Results <- append(US$Results, US_FST45$Results)
PlotTransitionsStats(US_FST45)

Sensitivit to resolve Phenotype (FST 24h)

US_FST24 <-SplitUSData(US, select = (US$meta$Session == "FST24h"))
US_FST24$Results <- NULL
PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25", method = "up")

PlotBehaviorFlow_Delta(US_FST24, US_FST24$meta$Treatment == "Swim", lab = "kmeans.25", method = "down")

US_FST24 <- TwoGroupAnalysis(US_FST24, US_FST24$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_FST24$Results[[1]]$Statistics$raw)
##                                    name          p FDR
## 27    bodycentre.periphery.speed.moving 0.02327161   1
## 10              bodycentre.speed.moving 0.02915563   1
## 12                bodycentre.total.time 0.07172900   1
## 34   bodycentre.corners.distance.moving 0.08488321   1
## 25 bodycentre.periphery.distance.moving 0.08596782   1
## 24    bodycentre.periphery.raw.distance 0.08880880   1
#clusters stats
head(US_FST24$Results[[1]]$Statistics$kmeans.25)
##           name          p       FDR
## 30   X22.count 0.00224739 0.5055735
## 14   X24.count 0.01139820 1.0000000
## 31 X17.nframes 0.01935312 1.0000000
## 32   X17.count 0.02700361 1.0000000
## 13 X24.nframes 0.03021485 1.0000000
## 36    X2.count 0.03056139 1.0000000
#clusters Transition Stats
head(US_FST24$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to     p.value       Swim     Control          FC FDR
## 161   24 13 0.001821792 0.93333333  2.33333333  0.40000000   1
## 377   17 16 0.002583943 0.66666667  0.06666667 10.00000000   1
## 70    10 14 0.005341900 0.06666667  1.13333333  0.05882353   1
## 110    8  6 0.008736777 8.53333333 11.53333333  0.73988439   1
## 390   17 22 0.008791038 1.06666667  2.40000000  0.44444444   1
## 307   12 24 0.013517382 0.46666667  0.00000000         Inf   1
US$Results <- append(US$Results, US_FST24$Results)
PlotTransitionsStats(US_FST24)

Sensitivit to resolve Phenotype (Tunnel Handling)

US_Tun <-SplitUSData(US, select = (US$meta$Session == "PreYoh"))
US_Tun$Results <- NULL
PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25",method = "up")

PlotBehaviorFlow_Delta(US_Tun, US_Tun$meta$Condition == "Tunnel", lab = "kmeans.25", method = "down")

US_Tun <- TwoGroupAnalysis(US_Tun, US_Tun$meta$Condition)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_Tun$Results[[1]]$Statistics$raw)
##                                 name            p         FDR
## 26    bodycentre.periphery.raw.speed 3.004594e-05 0.002390231
## 8         bodycentre.distance.moving 4.077278e-05 0.002390231
## 7            bodycentre.raw.distance 5.419182e-05 0.002390231
## 9               bodycentre.raw.speed 5.578236e-05 0.002390231
## 35      bodycentre.corners.raw.speed 7.240348e-05 0.002390231
## 27 bodycentre.periphery.speed.moving 8.129103e-05 0.002390231
#clusters stats
head(US_Tun$Results[[1]]$Statistics$kmeans.25)
##          name           p       FDR
## 1  X7.nframes 0.002877083 0.2919733
## 44   X3.count 0.003760699 0.2919733
## 43 X3.nframes 0.003893665 0.2919733
## 2    X7.count 0.005646607 0.3175655
## 28  X11.count 0.007882055 0.3546299
## 24   X1.count 0.013144514 0.4679903
#clusters Transition Stats
head(US_Tun$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to      p.value Tunnel Control        FC       FDR
## 326   11  7 8.264637e-05  36.65   25.25 1.4514851 0.2234578
## 289    1 11 2.104314e-03  41.10   31.45 1.3068362 1.0000000
## 109    8 21 5.168102e-03   0.80    1.80 0.4444444 1.0000000
## 549    3 23 5.218830e-03   8.30    4.30 1.9302326 1.0000000
## 497   14  3 5.478437e-03  21.10   16.80 1.2559524 1.0000000
## 262   13  1 5.493013e-03   6.20    3.85 1.6103896 1.0000000
US$Results <- append(US$Results, US_Tun$Results)
PlotTransitionsStats(US_Tun)

Sensitivit to resolve Phenotype (Shock anxiety)

US_SA <-SplitUSData(US, select = (US$meta$Session == "PostSA"))
US_SA$Results <- NULL
PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25", method = "up")

PlotBehaviorFlow_Delta(US_SA, US_SA$meta$Treatment == "SA", lab = "kmeans.25", method = "down")

US_SA <- TwoGroupAnalysis(US_SA, US_SA$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_SA$Results[[1]]$Statistics$raw)
##                                      name            p         FDR
## 31 bodycentre.periphery.percentage.moving 8.544603e-05 0.006290669
## 40   bodycentre.corners.percentage.moving 9.503969e-05 0.006290669
## 35           bodycentre.corners.raw.speed 1.069719e-04 0.006290669
## 26         bodycentre.periphery.raw.speed 2.851125e-04 0.007474227
## 13             bodycentre.time.stationary 2.933267e-04 0.007474227
## 14           bodycentre.percentage.moving 2.957890e-04 0.007474227
#clusters stats
head(US_SA$Results[[1]]$Statistics$kmeans.25)
##          name           p       FDR
## 40  X14.count 0.001466045 0.3298018
## 46  X25.count 0.003328359 0.3743743
## 28  X11.count 0.007608333 0.4331980
## 41 X9.nframes 0.007702657 0.4331980
## 44   X3.count 0.012313689 0.5026469
## 42   X9.count 0.013406284 0.5026469
#clusters Transition Stats
head(US_SA$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to      p.value        SA    Control        FC FDR
## 483   14  4 0.0004480593 1.2333333  2.5517241 0.4833333   1
## 497   14  3 0.0031154643 8.6333333 11.7241379 0.7363725   1
## 557   25 24 0.0036088107 0.1333333  0.6206897 0.2148148   1
## 420   15 14 0.0044014861 3.0333333  4.8275862 0.6283333   1
## 411   15 13 0.0047287440 0.7666667  0.2413793 3.1761905   1
## 86    20 13 0.0055192157 4.3333333  2.8965517 1.4960317   1
US$Results <- append(US$Results, US_SA$Results)
PlotTransitionsStats(US_SA)

Sensitivit to resolve Phenotype (Yohimbine)

US_Yoh <-SplitUSData(US, select = (US$meta$Session == "PostYoh"))
US_Yoh$Results <- NULL
PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25")

PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25", method = "up")

PlotBehaviorFlow_Delta(US_Yoh, US_Yoh$meta$Treatment == "Yohimbin", lab = "kmeans.25", method = "down")

US_Yoh <- TwoGroupAnalysis(US_Yoh, US_Yoh$meta$Treatment)
## [1] "no label specified, performing analysis for all"
#classical readouts
head(US_Yoh$Results[[1]]$Statistics$raw)
##                               name            p         FDR
## 2  classifications.Supported.count 6.183963e-06 0.001090976
## 36 bodycentre.corners.speed.moving 5.208191e-05 0.003886288
## 1   classifications.Supported.time 7.699250e-05 0.003886288
## 18  bodycentre.center.speed.moving 1.078355e-04 0.003886288
## 4       classifications.None.count 1.101429e-04 0.003886288
## 35    bodycentre.corners.raw.speed 1.575135e-04 0.004631430
#clusters stats
head(US_Yoh$Results[[1]]$Statistics$kmeans.25)
##          name            p        FDR
## 1  X7.nframes 0.0001877460 0.03729122
## 2    X7.count 0.0003315361 0.03729122
## 6   X10.count 0.0008877689 0.05256664
## 16   X4.count 0.0010335411 0.05256664
## 18  X21.count 0.0011683538 0.05256664
## 28  X11.count 0.0018196153 0.06822353
#clusters Transition Stats
head(US_Yoh$Results[[1]]$TransitionStats$kmeans.25$transitions)
##     from to      p.value Saline  Yohimbin        FC       FDR
## 356   22 18 0.0001299529    1.4  3.966667 0.3529412 0.2826007
## 289    1 11 0.0002114144   31.3 17.933333 1.7453532 0.2826007
## 62    10  1 0.0003685239   11.7  6.733333 1.7376238 0.3284074
## 326   11  7 0.0010857317   29.8 12.033333 2.4764543 0.6922640
## 430    2  8 0.0014258274    0.0  0.300000 0.0000000 0.6922640
## 5      7  8 0.0015536542   11.2  4.966667 2.2550336 0.6922640
US$Results <- append(US$Results, US_Yoh$Results)
saveRDS(US,"../data/USData_Results.rds")
PlotTransitionsStats(US_Yoh)